This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#dir.create("mers")
#setwd('~/./mers')
mers <- read.csv('cases.csv')
#INSTALL PACKAGES FIRST, THEN LOAD LIBRARY
mers$hospitalized[890] <- c('2015-02-20')
head(mers)
## number FT KSA_case code gender age country province city district
## 1 1 2 25M M 25 Jordan Zarqa
## 2 2 30M M 30 Jordan Zarqa
## 3 3 1 40F F 40 Jordan Zarqa
## 4 4 60M M 60 Jordan Zarqa
## 5 5 29M M 29 Jordan Zarqa
## 6 6 33M M 33 Jordan Zarqa
## prior_travel hospital exposure onset hospitalized sampled reported
## 1 2012-03-21 2012-04-04
## 2 2012-03-30 2012-04-08
## 3 2012-04-02 2012-04-09
## 4 2012-04-02
## 5 2012-04-11 2012-04-15
## 6 2012-04-12 2012-04-14
## death discharged comorbidity severity outcome clinical
## 1 2012-04-25 fatal fatal fatal
## 2 CCU clinical
## 3 2012-04-19 fatal fatal fatal
## 4 subclinical
## 5 CCU clinical
## 6 CCU clinical
## old_cluster cluster Cauchemez.cluster animal_contact camel_contact HCW
## 1 A A 4 FALSE FALSE
## 2 A A 4 FALSE TRUE
## 3 A A 4 FALSE TRUE
## 4 A A 4 FALSE TRUE
## 5 A A 4 TRUE
## 6 A A 4 TRUE
## contact_with contact secondary suspected inferred notes
## 1 NA
## 2 1 health care worker TRUE TRUE NA probable
## 3 1 health care worker TRUE NA
## 4 1 health care worker TRUE TRUE NA probable
## 5 health care worker TRUE TRUE NA probable
## 6 1 health care worker TRUE TRUE NA probable
## citation
## 1 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 2 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 3 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 4 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 5 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 6 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## citation2 citation3 citation4 citation5 sequence accession patient
## 1 1
## 2 2
## 3 Jordan-N3_2012 KC776174 3
## 4 4
## 5 5
## 6 6
## speculation X X.1
## 1 NA http://promedmail.org/direct.php?id=3587349
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
mers <- mers[-471,]
#install.packages("lubridate" and "ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
mers$onset2 <- ymd(mers$onset)
mers$hospitalized2 <- ymd(mers$hospitalized)
## Warning: 5 failed to parse.
class(mers$onset2)
## [1] "Date"
day0 <- min(na.omit(mers$onset2))
mers$epi.day <- as.numeric(mers$onset2 - day0)
ggplot1 <- ggplot(data=mers) + #turned the graph into an opject to make it easier to include all plots in report
geom_bar(mapping=aes(x=epi.day)) +
labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot1)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).
ggplot2 <- ggplot(data=mers) +
geom_bar(mapping=aes(x=epi.day, fill=country)) +
labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).
## Warning: position_stack requires non-overlapping x intervals
#CHANGED Y AXIS MAX TO 15
ggplot3 <- ggplot(data=mers) +
geom_bar(mapping=aes(x=epi.day, fill=country)) +
ylim (0, 15) +
labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot3)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).
## Warning: position_stack requires non-overlapping x intervals
#CHANGED COLOUR PALETTE
require(RColorBrewer)
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 3.4.1
ggplot4 <- ggplot(data=mers) +
geom_bar(mapping=aes(x=epi.day, fill=country)) +
labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplot4 + scale_fill_brewer(palette="PRGn")
## Warning: Removed 535 rows containing non-finite values (stat_count).
## Warning: position_stack requires non-overlapping x intervals
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette PRGn is 11
## Returning the palette you asked for with that many colors
ggplotly(p=ggplot4)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).
## Warning: position_stack requires non-overlapping x intervals
#CALCULATE THE ACTUAL INFECTIOUS PERIOD (RAW DATA)
mers$infectious.period <- mers$hospitalized2-mers$onset2
#CHECK WHAT CLASS OF DATA THE INFECTIOUS PERIOD IS CLASSIFIED AS
class(mers$infectious.period)
## [1] "difftime"
#result = "difftime", which means time intervals/differences
#NOW, SPECIFY THAT THE UNITS ARE DAYS. THIS COMMAND ALSO CHANGES A FACTOR INTO A NUMBERIC (as.numeric)
mers$infectious.period <- as.numeric(mers$infectious.period, units = "days")
ggplot5 <- ggplot(data=mers) +
geom_histogram(aes(x=infectious.period)) +
labs(x="Infectious period", y="Frequency", title="Distribution of calculated MERS infectious period", caption = "Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot5)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
#CHANGE X AXIS RANGE:
ggplot6 <- ggplot(data=mers) +
geom_histogram(aes(x=infectious.period)) +
xlim (-100,150) +
labs(x="Infectious period", y="Frequency", title="Distribution of calculated MERS infectious period", caption = "Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot6)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 740 rows containing non-finite values (stat_bin).
#IFELSE CAN BE USED TO CONSTRUCT A VECTOR WITH TRUE/FALSE WHEN PERIOD < 0
#calculated infectious period in the case where it is positive and zero otherwise
mers$infectious.period2 <-ifelse(mers$infectious.period<0,0,mers$infectious.period)
#THIS CODE WILL EXCLUDE VALUES <0
ggplot7 <- ggplot(data=mers) +
geom_histogram(aes(x=infectious.period2)) +
labs(x='Infectious period', y='Frequency', title='Distribution of calculated MERS infectious period (positive values only)', caption='Data')
ggplotly(p=ggplot7)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
#CHANGE PLOT TYPE TO DENSITY PLOT
ggplot8 <- ggplot(data=mers) +
geom_density(aes(x=infectious.period2)) +
labs(x='Infectious period', y='Frequency', title='Probability density for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot8)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 727 rows containing non-finite values (stat_density).
#RATHER THAN DENSITY PLOT, CAN GRAPH THE PLOT AREA (SIMPLY, BOXES ARE FILLED IN)
#THE DATA NEEDS TO BE BINNED TO PLOT (DISCREET INTERVALS)
ggplot9 <- ggplot(data=mers) +
geom_area(stat='bin', mapping=aes(x=infectious.period2)) +
labs(x='Infectious period', y='Frequency', title='Area plot for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot9)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
#NOW CHANGE TO DOT PLOT
ggplot10 <- ggplot(data=mers) +
geom_dotplot(stat='bin', mapping=aes(x=infectious.period2)) +
labs(x='Infectious period', y='Frequency', title='Area plot for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
## Warning: Ignoring unknown parameters: stat
ggplotly(p=ggplot10)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bindot).
#PLOT AREA, BUT RATHER THEN INFECTIOUS PERIOD AND FREQUENCY, PLOTTING INFECTIOUS PERIOD AND
#THE DATA NEEDS TO BE BINNED TO PLOT (DISCREET INTERVALS)
#I HAVE ALSO CHANGED THE Y AXIS MAX TO 40
#THIS CODE BUILDS UPON THE ABOVE, THOUGH SMOOTHING THE LINE
ggplot11 <- ggplot(data=mers) +
geom_smooth(aes(x=epi.day, y=infectious.period2), method="loess") +
labs(x='Day', y='Infectious Period', title='Probability density for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot11)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 727 rows containing non-finite values (stat_smooth).
#PLOT AREA, BUT RATHER THEN INFECTIOUS PERIOD AND FREQUENCY, PLOTTING INFECTIOUS PERIOD AND
#THE DATA NEEDS TO BE BINNED TO PLOT (DISCREET INTERVALS)
#I HAVE ALSO CHANGED THE Y AXIS MAX TO 40
ggplot12 <- ggplot(data=mers) +
geom_point(aes(x=epi.day, y=infectious.period2, colour=country)) +
ylim(0,40) +
labs(x='Day', y='Infectious Period', title='Probability density for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot12)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#FACETING THE ABOVE GRAPH
ggplot13 <- ggplot(data=mers, mapping=aes(x=epi.day, y=infectious.period2)) +
geom_point(mapping = aes(colour=country)) +
facet_wrap(~ country) +
scale_y_continuous(limits = c(0, 50)) +
labs(x='Epidemic day', y='Infectious Period', title='MERS infectious period (positive values only) over time', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot13)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#ONLY GRAPH CERTAIN COUNTRIES (FACETING OF THE GRAPHS ALSO)
ggplot14 <- ggplot(data=subset(mers, gender %in% c('M', 'F') & country %in% c('KSA', 'Oman', 'Iran', 'Jordan', 'Qatar', 'South Korea', 'UAE'))) +
geom_point(mapping = aes(x=epi.day, y=infectious.period2, colour=country)) +
facet_grid(gender ~ country) +
scale_y_continuous(limits = c(0, 50)) +
labs(x='Epidemic day', y='Infectious Period', title='MERS infectious period by gender and country', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot14)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#DATA EXPLORATION
ggplot15 <- ggplot(data=subset(mers, gender %in% c('M', 'F') & country %in% c('KSA', 'Oman', 'Iran', 'Jordan', 'Qatar', 'South Korea', 'UAE'))) +
geom_point(mapping = aes(x=epi.day, y=infectious.period2, colour=country)) +
facet_grid(gender ~ country) +
scale_y_continuous(limits = c(0, 50)) +
labs(x='Epidemic day', y='Infectious Period', title='MERS infectious period by gender and country', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot15)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#CALCULATE CASE FATALITY IN MERS DATASET
#IFELSE CAN BE USED TO CONSTRUCT A VECTOR WITH TRUE/FALSE WHEN PERIOD < 0
#calculated infectious period in the case where it is positive and zero otherwise
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.